#!/bin/bash

version=2.04
myname=$(basename "$0")

<<'DOC'
= ltxfileinfo - print version information for a LaTeX file

= Synopsis
ltxfileinfo [options] filename

Options:
-h,--help	print this help and exit
-H,--Help	print full documentation via less and exit
-V		print this script's version and exit
-d,--date	print file's date only
-v,--version	print file's version only
-i,--info	print file's description text only
-l,--location	print file's full path only (same output as kpsewhich)
-f,--flat	output as 1 line with 4 tab-separated file, date, version and info fields
-s,--star	mark mal-formatted data with a star
-c,--color	mark mal-formatted data with red color (ANSI coloring)
-D,--debug	for debugging, keep temporary files

= Description
ltxfileinfo displays version information for LaTeX files. If no path
information is given, the file is searched using kpsewhich. As an extra,
for developers, the script will (use the |--star| or |--color| options)
check the valididity of the |\Provides...| statement in the files.
The script uses code from Uwe Lück's |readprov.sty|.

Without an option, the output will be of the form:
  $ ltxfileinfo ctable.sty
  name: ctable.sty
  date: 2012/08/23
  vers: v1.24
  info: Easy, key=value directed, option-rich, typesetting of floats
  loca: /usr/local/texlive/2012/../texmf-local/tex/latex/ctable/ctable.sty

Missing information is represented by |--|:
  $ ltxfileinfo yhmath.sty
  name: yhmath.sty
  date: --
  vers: --
  info: --
  loca: /usr/local/texlive/2012/texmf-dist/tex/latex/yhmath/yhmath.sty

ltxfileinfo tries to detect (and report) mal-formatted dates and versions; 
use the |--star| option to mark such fields with a star, of use the |--color|
option to color invalid fields red, if you work in a terminal with ANSI-coloring
capabilities.
These options are useful for developers who want to check the correctness
of their |\Provides...| statements. For example:

  $ ltxfileinfo -s arfonts.sty
  file: ARfonts.sty* (case difference in \Provides statement)
  date: 2006/01/01
  vers: --*
  info: Part of the Arabi package
  loca: /usr/local/texlive/2012/texmf-dist/tex/latex/arabi/arfonts.sty

We see here, that the |\ProvidesPackage| statement has an incorrect first
argument and has no version information.

= Other output formats
With the |--date| option, only the file's date will be shown, unlabeled.
The |--version|, |--location|, |--info| options are treated analogously.

  $ ltxfileinfo -v chronology.sty
  v1.0

The |--flat| option prints the fields (except loca:, the last field) on one
line, unlabeled and tab-separated:
 
  $ ltxfileinfo --star --flat chronology.sty
  chronology.sty        2010/6/12*      v1.0    Horizontal timeline

The date is starred, because its format is not yyyy/mm/dd (and the |--star|
option is given.)

= Bugs

On my system, I have a total of 8699 kpsewhich-detectable files that contain a |\Provides...| statement. 
I ran them all through |ltxfileinfo| and made the following summary of detected errors:

  8695  Total \Provides... containing files tested 
  
   660  \Provides... could not be interpreted; reason:
        160  Argument of \Provides... not equal to file's name
          8  Latex3 package (not handled yet)
        480  Unidentified problem with \ProvidesPackage statement
         12  \Provides... used in file without a ... extension
  
  8035  Files could be evaluated
        482  had no date
         95  had a mal-formatted date 
       2666  had no version
        357  had a mal-formatted version
        765  had a \Provides... first argument different from the filename

The .dtx files have more problems than other files:

  1345  .dtx files:
  
   320   \Provides... could not be interpreted; reason:
         81  Argument of \Provides... not equal to...
          6  Latex3 package (not handled yet)
        233  Unidentified problem with \ProvidesPackage statement
  
  1025  Files could be evaluated
         49  had no date
         15  had a mal-formatted date 
         99  had no version
         52  had a mal-formatted version
        468  had a \Provides... first argument different from the filename

= Author and copyright
Author	Wybo Dekker
Email	U{Wybo@dekkerdocumenten.nl}{wybo@dekkerdocumenten.nl}
License	Released under the U{www.gnu.org/copyleft/gpl.html}{GNU General Public License}
DOC

    die() { echo -e "$myname: $Err${*}$Nor" 1>&2; exit 1; }
   help() { sed -n '/^= Synopsis/,/^= /p' "$0"|sed '1s/.*/Usage:/;/^= /d'; exit; }
helpall() { sed -n '/^<<.DOC.$/,/^DOC$/p' "$0"|sed -n '1d;$d;p'|less; exit; }
version() { echo $version; exit; }
install() { which instscript>&/dev/null && instscript --zip --pdf --markdown "$myname"; exit; }

Nor='\e[0m'    # reset color	]
Err='\e[31;1m' # light red	]
[[ ${BASH_VERSINFO[0]} -ge 4 ]] || die "Need bash version >= 4 (you have $BASH_VERSION)"

# remove leading and trailing whitespace from a string
function trim {
   local var="$*"
   var="${var#"${var%%[![:space:]]*}"}"   # remove leading whitespace
   var="${var%"${var##*[![:space:]]}"}"   # remove trailing whitespace
   echo -n "$var"
}

# mark a string by appending a * (--star and --color options)
function mark {
   echo "$mark1${*}$mark2"
}

# special treatment for .mbs and .bst files:
function dombsbst {
   # look at the first \ProvidesFile only:
   out="File: "$(grep '\ProvidesFile{' "$loca" |head -1 |sed 's/^[ %]*\\ProvidesFile{//;s/}\[/ /;s/\]$//')
}

# unpack the fields
function extract {
  shift # remove File:
  file=$1
  shift # remove filename
  if [[ -z $1 ]]; then
     date=--; vers=--; info=--; return
  elif [[ $1 =~ ^[0-9]+/[0-9]+/[0-9]+ ]]; then
     date="$1"
     shift
  else
     date="--"
  fi
  if [[ -z $1 ]]; then
     vers=--; info=--; return
  elif [[ $1 =~ ^v?[[:digit:]][.[:digit:]]+[a-z]* ]]; then
     vers="$1"
     shift
     info="${*:---}"
     return
  else
     fixed=false
     # sometimes the version is of the form v.1.3 or vers:1.3 or such:
     for i in version ver. ver: ver v. V v; do
        if [[ $1 =~ $i[[:digit:]][.[:digit:]]*[a-z]* ]]; then
           vers="$1"
           shift
           fixed=true
           break
        fi
     done
     if ! $fixed; then
        # sometimes the version is reported as "v 1.3" or "vers: 1.3" or such:
        for i in version ver: ver v. V v; do 
           if [[ $1 == "$i" ]]; then
              if [[ $2 =~ [[:digit:]][.[:digit:]]+[a-z]* ]]; then
                 vers="$1 $2"
                 shift 2
                 fixed=true
                 break
              fi
           fi
        done
     fi
     info="${*:---}"
     if ! $fixed; then
        vers=--
     fi
  fi
}

# display the results
function display {
   # file should be equal to arg (the argument)
   if [[ $file != "$arg" ]] && [[ -n $mark2 ]]; then 
      file="$(mark "$file") (case difference in \\Provides statement)"
   fi
   
   # to be correct, date must be of the form yyyy/mm/dd
   if [[ $date == -- ]]; then
      date="$(mark $date)  (file modification date: $(stat -c %z "$loca"|sed -e 's/\..*//'))"
   elif [[ -n $mark2 ]]; then
      local wrong=false add=''
      if [[ ! $date =~ ^[[:digit:]]{4}/[[:digit:]]{2}/[[:digit:]]{2}$ ]]; then
         add+="  (wrong format)"
         wrong=true
      fi
      if [ "$(date -d "$date" +%arg 2>&1 | grep invalid)" != "" ]; then
         # even if it matches, the date must be valid:
         add+="  (invalid date)"
         wrong=true
      fi
      if $wrong; then date="$(mark "$date")$add"; fi
   fi
   
   # version should be 1.2 or 1.2.3, maybe prefixed with v and suffixed with zero or more lower case letters
   if [[ ! $vers =~ ^v?[[:digit:]]+.[[:digit:]]+(.[[:digit:]]+)?[[:lower:]]*$ ]]; then vers=$(mark $vers); fi
   
   if [ ! "$key" ]; then
      if $flat; then 
         echo -e "$file\t$date\t$vers\t$info"
      else
         cat <<-EOF
	file: $file
	date: $date
	vers: $vers
	info: $info
	loca: $loca
	EOF
      fi
   else
      eval "echo \$$key"
   fi  
}

if ! options=$(getopt\
   -n "$myname" \
   -o hHiVIdvlifcsD \
   -l help,Help,date,version,location,info,flat,star,color,debug -- "$@"
); then exit 1; fi
eval set -- "$options"
debug=false flat=false
while [ $# -gt 0 ]; do
   case $1 in
   (-h|--help)     # print this help and exit
                   help
                   ;;
   (-H|--Help)     # print full documentation via less and exit
                   helpall
                   ;;
   (-V)            version
                   ;;
   (-d|--date)     # print file's date only
                   key=date
                   ;;
   (-v|--version)  # print file's version only
                   key=vers
                   ;;
   (-i|--info)     # print file's description text only
                   key=info
                   ;;
   (-l|--location) # print file's full path only (same output as kpsewhich)
                   key=loca
                   ;;
   (-f|--flat)     # output as 1 line with 4 tab-separated file, date, version and info fields
                   flat=true
                   ;;
   (-s|--star)     # mark mal-formatted data with a star
                   mark1=''
                   mark2='*'
                   ;;
   (-c|--color)    # mark mal-formatted data with red color (ANSI coloring)
                   mark1='[1;31m'
                   mark2='[0m'
                   ;;
   (-D|--debug)    # for debugging, keep temporary files
                   debug=true
                   ;;
   (-I)            install
                   ;;
   (--)            shift
                   break
                   ;;
   (*)             break
                   ;;
   esac
   shift
done

arg=$1
case $arg in
(*/*)	loca=$arg
	[[ -e $loca ]] || die "$loca: file does not exist";;
('')	help;;
(*)	loca=$(kpsewhich "$arg") || die "$arg: not found by kpsewhich"
esac
loca=$(kpsewhich "$arg")
prov=$(grep '\\Provides' "$loca") || die "no \\Provides..."
[[ $prov =~ ExplPackage ]] && die 'Latex3 package (not handled yet)'

# the file must contain a \ProvidesXXX statement, where XXX is Class, Package, or File:
[[ $prov =~ \\Provides(Class|Package|File)[[:space:]]*\{ ]] ||
   die "Found no \\ProvidesClass/Package/File statement"

# readprov.sty does not work on .mbs and .bst files: special treatment;
# \ProvideFile statements in them mostly refer to merlin.mbs, or other names
[[ $arg =~ \.(mbs|bst)$ ]] && dombsbst

dir=$(mktemp -d -t "$myname.XXXXXXXXXX")
if $debug; then
   echo "Running in $dir"
else
   trap 'rm -rf $dir' 0 1 2 15
fi
cp "$loca" "$dir"
cd "$dir" || die "Could not cd to $dir"

# The following code is mostly from Uwe Lueck's readprov.sty:
printf %s '
\makeatletter
\def\GetFileInfo#1{%
  \def\filename{#1}%
  \def\@tempb##1 ##2 ##3\relax##4\relax{%
    \def\filedate{##1}%
    \def\fileversion{##2}%
    \def\fileinfo{##3}}%
    \read@file@info\@tempb{#1}} 
\newcommand*{\read@file@info}[2]{%
  \expandafter \expandafter \expandafter
    #1\csname ver@#2\endcsname \relax? ? \relax\relax}
\newcommand*{\ReadFileInfos}[1]{%
  \begingroup
    \let\RP@@provfile\@providesfile
    \def\@providesfile##1[##2]{\RP@@provfile{##1}[{##2}]\endinput}%
    \def\ProvidesClass  ##1{\ProvidesFile{##1.\@clsextension}}%
    \def\ProvidesPackage##1{\ProvidesFile{##1.\@pkgextension}}%
    \@for\@tempa:=#1\do{%
      \edef\@tempa{\expandafter\read@no@spaces\@tempa\@nil}%
      \input{\@tempa}%
      \global\let\@gtempa\@tempa}%
  \endgroup
  \GetFileInfo\@gtempa%
}
\def\read@no@spaces#1#2\@nil{#1#2}%
\def\NeedsTeXFormat#1{\expandafter\@needsformat}
\ReadFileInfos{'"$loca"'}
\endinput
' > ltxfileinfo.tex

pdflatex -interaction=batchmode ltxfileinfo.tex >& /dev/null

IFS= # do not remove any whitespace
base=${arg##*/} # strip the path
base=${base%.*} # strip the extension
shopt -s nocasematch # MS people don't pay attention to case differences in file names...
while read -r line; do
   if [[ $line =~ ^File:.$base ]]; then 
      out="$line"
      while [ ${#line} -eq 79 ]; do # gather continuation lines
         read -r line
         out="$out$line"
      done  
   fi
done <ltxfileinfo.log
IFS=' '			# back to 
shopt -u nocasematch	# normal

# if the \Provides statement is not recognized, try to find out why, and die:
if [[ -z $out ]]; then
   read -r provtype provarg <<<"$(sed -n '/\Provides\(Class\|Package\|File\)\s*{[^}]*}/s/.*\(Class\|Package\|File\)\s*{\([^}]*\)}.*/\1 \2/p' "$loca")"
   if [[ -z $provarg || $provarg =~ \\ ]]; then die Unidentified problem with \\ProvidesPackage statement; fi
   case $provtype in
   (Package) 
     if [[ ! $arg =~ \.(sty|dtx)$ ]]; then
       die \\ProvidesPackage used in file without a .sty extension
     elif [[ $provarg =~ \.sty$ ]]; then
       die "Argument of \\ProvidesPackage ($provarg) should not contain .sty extension"
     elif [[ $provarg != "$base" ]]; then
       die "Argument of \\ProvidesPackage ($provarg) not equal to file's base name ($base)"
     else
       die Unidentified problem with \\ProvidesPackage statement
     fi
     ;;
   (Class)
     if [[ ! $arg =~ \.(cls|dtx)$ ]]; then
       die \\ProvidesClass used in file without a .cls extension
     elif [[ $provarg =~ \.cls$ ]]; then
       die "Argument of \\ProvidesClass ($provarg) should not contain .cls extension"
     elif [[ $provarg != "$base" ]]; then
       die "Argument of \\ProvidesClass ($provarg) not equal to file's base name ($base)"
     else
       die Unidentified problem with \\ProvidesPackage statement
     fi
     ;;
   (File)
     if [[ $arg != "$provarg" ]]; then
       die "Argument of \\ProvidesFile ($provarg) not equal to file's name ($arg)"
     else
       die Unidentified problem with \\ProvidesPackage statement
     fi
     ;;
   (*)
     die "Found \\Provides$provarg statement - cannot handle it"
     ;;  
   esac
fi

out=($out)
extract "${out[@]}"
display
